
######################################################################################################.
#Preparation script: merge Democracy Barometer with Data_merged_v4
######################################################################################################.


####################################
#Values DB 2008-2017
###################################

rm(list = ls())


db_new <- read_excel("Data/DB_data_2020_all_data_Standardised.xlsx")

#name of functions
db_function <- c("INDLIB",	"RULEOFLAW",	"PUBLIC", "COMPET",	"MUTUCONS",	"GOVCAP",	"TRANSPAR",	"PARTICIP",	"REPRES", "DQ")


rescaling <- function(x){
  x.out <- ((x - 1)*100)/(max(x, na.rm = TRUE) - 1)  #Rescale from min(1):max scale to scale [0, 100] (https://en.wikipedia.org/wiki/Feature_scaling) 
  return(x.out)
}


db_new <- db_new %>% mutate_at(db_function,rescaling)


db_new_mean <- db_new %>%
  subset(Year>2007 & Year<2018) %>% 
  group_by(Country) %>%
  summarize_at(vars(db_function), mean, na.rm = TRUE) %>% 
  rename(country_DB=Country) %>% 
  rename_at(all_of(db_function), ~ paste(all_of(db_function), "new",  sep="_") )

db_new_mean <- db_new_mean %>% 
  mutate(country_DB = case_when(
    country_DB == "Bosnia-Herzegovina" ~ "Bosnia and Herzegovina",
    TRUE ~ as.character(country_DB)
  ))

db_both <- db_new_mean

is.nan.data.frame <- function(x) #change NaN to NA
  do.call(cbind, lapply(x, is.nan))

db_both[is.nan(db_both)] <- NA

df <- read_excel("Data/Data_merged_v4.xlsx")

# 2. Join Datasets ====
df_full <- plyr::join_all(list(df,
                               db_both), by=c("country_DB"), type="left") 

write.xlsx(df_full, "Data/1_dataset.xlsx")

############################################################################################################################
#Values DB: only 2017
############################################################################################################################

rm(list = ls())
db_new <- read_excel("Data/DB_data_2020_all_data_Standardised.xlsx")

#name of functions
db_function <- c("INDLIB",	"RULEOFLAW",	"PUBLIC", "COMPET",	"MUTUCONS",	"GOVCAP",	"TRANSPAR",	"PARTICIP",	"REPRES", "DQ")



rescaling <- function(x){
  x.out <- ((x - 1)*100)/(max(x, na.rm = TRUE) - 1)  #Rescale from min(1):max scale to scale [0, 100] (https://en.wikipedia.org/wiki/Feature_scaling) 
  return(x.out)
}


db_new <- db_new %>% mutate_at(db_function,rescaling)


db_new_mean <- db_new %>%
  subset(Year==2017) %>% 
  group_by(Country) %>%
  summarize_at(vars(db_function), mean, na.rm = TRUE) %>% 
  rename(country_DB=Country) %>% 
  rename_at(all_of(db_function), ~ paste(all_of(db_function), "new",  sep="_") )

db_new_mean <- db_new_mean %>% 
  mutate(country_DB = case_when(
    country_DB == "Bosnia-Herzegovina" ~ "Bosnia and Herzegovina",
    TRUE ~ as.character(country_DB)
  ))


is.nan.data.frame <- function(x) #change NaN to NA
  do.call(cbind, lapply(x, is.nan))

db_new_mean[is.nan(db_new_mean)] <- NA

df <- read_excel("Data/Data_merged_v4.xlsx")

# 2. Join Datasets ====
df_full <- plyr::join_all(list(df,
                               db_new_mean), by=c("country_DB"), type="left") 

write.xlsx(df_full, "Data/1_dataset_DB2017.xlsx")

